Ejemplo tomado:

# Machine Learning
library(tidymodels)
## Registered S3 method overwritten by 'tune':
##   method                   from   
##   required_pkgs.model_spec parsnip
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.3 ──
## ✓ broom        0.7.9.9000     ✓ recipes      0.1.16    
## ✓ dials        0.0.9          ✓ rsample      0.1.0     
## ✓ dplyr        1.0.7          ✓ tibble       3.1.6     
## ✓ ggplot2      3.3.5          ✓ tidyr        1.1.3     
## ✓ infer        1.0.0          ✓ tune         0.1.6     
## ✓ modeldata    0.1.1          ✓ workflows    0.2.3     
## ✓ parsnip      0.1.7          ✓ workflowsets 0.1.0     
## ✓ purrr        0.3.4          ✓ yardstick    0.0.8
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x purrr::discard() masks scales::discard()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x recipes::step()  masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
library(modeltime)
library(modeltime.ensemble)
## Loading required package: modeltime.resample
library(modeltime.resample)

# Time Series
library(timetk)

# Core
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ✓ stringr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard()    masks scales::discard()
## x dplyr::filter()     masks stats::filter()
## x stringr::fixed()    masks recipes::fixed()
## x dplyr::lag()        masks stats::lag()
## x readr::spec()       masks yardstick::spec()
# Load data
library(readxl)

MODELO DE MACHINE LEARNING

COLONES

colones%>%
  plot_time_series(Date,value,.facet_ncol = 3, .interactive = F)

DATA PREPARATION

FORECAST_HORIZON <- 5

Full = Training + Forecast Dataset

full_data_tbl <- colones%>%
  select(Date,value)%>%
  future_frame(
    .date_var = Date,
    .length_out = FORECAST_HORIZON,
    .bind_data = T
  )

Training Data

data_prepared_tbl <- full_data_tbl%>%
  filter(!is.na(value))
  
data_prepared_tbl%>%
  tk_summary_diagnostics()
## tk_augment_timeseries_signature(): Using the following .date_var variable: Date
## # A tibble: 1 × 12
##   n.obs start      end        units scale tzone diff.minimum diff.q1 diff.median
##   <int> <date>     <date>     <chr> <chr> <chr>        <dbl>   <dbl>       <dbl>
## 1   246 2001-02-01 2021-07-01 days  month UTC        2419200 2592000     2678400
## # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>

Future Data Forecast

future_tbl <- full_data_tbl%>%
  filter(is.na(value))

SPLITTING

splits <- data_prepared_tbl%>%
  arrange(Date)%>%
  time_series_split(
    data_var=Date,
    assess = FORECAST_HORIZON,
    cumulative = T
  )
## Using date_var: Date
splits
## <Analysis/Assess/Total>
## <241/5/246>

PREPROCESOR

recipe_spec_1 <- recipe(value~., training(splits))%>%
  step_timeseries_signature(Date)%>%
  ## Elimina las columnas o atributos que no aportan
  step_rm(matches("(.iso$)|(.xts)|(day)|(hour)|(minute)|(second)|(am.pm)|(week)")) %>%
  step_normalize(Date_index.num,Date_year)%>%
  step_mutate(Date_month = factor(Date_month,ordered = T))%>%
  step_dummy(all_nominal(),one_hot = T)

recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date              <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value             <dbl> 12637.44, 13569.26, 11895.00, 12882.29, 13393.90, 11…
## $ Date_index.num    <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year         <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half         <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter      <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02     <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05     <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06     <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07     <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08     <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09     <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_2 <- recipe_spec_1%>%
  update_role(Date,new_role = "ID")

recipe_spec_2 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date              <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value             <dbl> 12637.44, 13569.26, 11895.00, 12882.29, 13393.90, 11…
## $ Date_index.num    <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year         <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half         <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter      <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02     <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05     <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06     <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07     <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08     <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09     <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_1 %>% prep() %>% summary()
## # A tibble: 30 × 4
##    variable       type    role      source  
##    <chr>          <chr>   <chr>     <chr>   
##  1 Date           date    predictor original
##  2 value          numeric outcome   original
##  3 Date_index.num numeric predictor derived 
##  4 Date_year      numeric predictor derived 
##  5 Date_half      numeric predictor derived 
##  6 Date_quarter   numeric predictor derived 
##  7 Date_month_01  numeric predictor derived 
##  8 Date_month_02  numeric predictor derived 
##  9 Date_month_03  numeric predictor derived 
## 10 Date_month_04  numeric predictor derived 
## # … with 20 more rows
recipe_spec_2 %>% prep() %>% summary()
## # A tibble: 30 × 4
##    variable       type    role      source  
##    <chr>          <chr>   <chr>     <chr>   
##  1 Date           date    ID        original
##  2 value          numeric outcome   original
##  3 Date_index.num numeric predictor derived 
##  4 Date_year      numeric predictor derived 
##  5 Date_half      numeric predictor derived 
##  6 Date_quarter   numeric predictor derived 
##  7 Date_month_01  numeric predictor derived 
##  8 Date_month_02  numeric predictor derived 
##  9 Date_month_03  numeric predictor derived 
## 10 Date_month_04  numeric predictor derived 
## # … with 20 more rows

MODELS

autoarima xgboost

wflw_fit_autoarima_boost <- workflow()%>%
  add_model(
    arima_boost(
    min_n = 2,
    learn_rate = 0.015
) %>%
    set_engine(engine = "auto_arima_xgboost")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))
## frequency = 12 observations per 1 year

prophet

wflw_fit_prophet <- workflow()%>%
  add_model(
    prophet_reg() %>% set_engine("prophet")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

XGBOOST

wflw_fit_xgboost_0_015 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.15) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_xgboost_0_1 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.1) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_xgboost_0_3 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.3) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

Random Forest

wflw_fit_rf_1000 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 1000
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_rf_500 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 500
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_rf_200 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 200
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

SVM

wflw_fit_svm <- workflow()%>%
  add_model(
    svm_rbf() %>% set_engine("kernlab")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

prophet_boost

wflw_fit_prophet_boost <- workflow()%>%
  add_model(
    prophet_boost(
      seasonality_yearly = F,
      seasonality_weekly = F,
      seasonality_daily =  F,
    ) %>% 
      set_engine("prophet_xgboost")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))

MODELTIME WORKFLOW

modeltime table

submodels_tbl <- modeltime_table(
  wflw_fit_autoarima_boost,
  #wflw_fit_prophet, #1
  wflw_fit_prophet_boost, #2
  #wflw_fit_xgboost_0_015, #3
  #wflw_fit_xgboost_0_1, #4
  wflw_fit_xgboost_0_3, #5
  #wflw_fit_rf_1000, #6
  wflw_fit_rf_500 #, #7
  #wflw_fit_rf_200, #8
  #wflw_fit_svm #9
)

submodels_tbl
## # Modeltime Table
## # A tibble: 4 × 3
##   .model_id .model     .model_desc                              
##       <int> <list>     <chr>                                    
## 1         1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS                
## 3         3 <workflow> XGBOOST                                  
## 4         4 <workflow> RANDOMFOREST

calibrate Testing Data

submodels_calibrated_tbl <- submodels_tbl %>%
  modeltime_calibrate(testing(splits))

submodels_calibrated_tbl
## # Modeltime Table
## # A tibble: 4 × 5
##   .model_id .model     .model_desc                               .type .calibration_da…
##       <int> <list>     <chr>                                     <chr> <list>          
## 1         1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS Test  <tibble [5 × 4]>
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS                 Test  <tibble [5 × 4]>
## 3         3 <workflow> XGBOOST                                   Test  <tibble [5 × 4]>
## 4         4 <workflow> RANDOMFOREST                              Test  <tibble [5 × 4]>

Measure Test Accuracy

submodels_calibrated_tbl%>% 
  modeltime_accuracy()%>%
  arrange(rmse)
## # A tibble: 4 × 9
##   .model_id .model_desc              .type    mae  mape  mase smape   rmse   rsq
##       <int> <chr>                    <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1         1 ARIMA(1,1,2)(2,0,0)[12]… Test  32499.  3.20 0.568  3.21 37479. 0.876
## 2         2 PROPHET W/ XGBOOST ERRO… Test  52131.  5.27 0.911  5.21 64436. 0.963
## 3         3 XGBOOST                  Test  46464.  4.47 0.812  4.67 66341. 0.521
## 4         4 RANDOMFOREST             Test  55565.  5.30 0.971  5.52 71791. 0.938

Visualize test forecast

submodels_calibrated_tbl %>%
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )

Refit on full training dataset

submodels_refit_tbl <- submodels_calibrated_tbl %>%
  modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year

Visualize Submodel Forecast

submodels_refit_tbl%>%
  modeltime_forecast(
    new_data =  future_tbl,
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )

Validación cruzada

https://cran.r-project.org/web/packages/modeltime.resample/vignettes/getting-started.html

resamples_tscv <- time_series_cv(
    data        = data_prepared_tbl,
    date_var    = Date,
    assess      = FORECAST_HORIZON,
    initial     = "36 month",
    skip        = FORECAST_HORIZON,
    slice_limit = 5
)

resamples_tscv
## # Time Series Cross Validation Plan 
## # A tibble: 5 × 2
##   splits         id    
##   <list>         <chr> 
## 1 <split [36/5]> Slice1
## 2 <split [36/5]> Slice2
## 3 <split [36/5]> Slice3
## 4 <split [36/5]> Slice4
## 5 <split [36/5]> Slice5
resamples_tscv %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(Date, 
                             value, 
                             .facet_ncol = 2,
                             .interactive = T)
Generate Resample Predictions
resamples_fitted <- submodels_tbl %>%
    modeltime_fit_resamples(
        resamples = resamples_tscv,
        control   = control_resamples(verbose = FALSE)
    )

resamples_fitted
## # Modeltime Table
## # A tibble: 4 × 4
##   .model_id .model     .model_desc                               .resample_resul…
##       <int> <list>     <chr>                                     <list>          
## 1         1 <workflow> ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS <rsmp[+]>       
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS                 <rsmp[+]>       
## 3         3 <workflow> XGBOOST                                   <rsmp[+]>       
## 4         4 <workflow> RANDOMFOREST                              <rsmp[+]>
Evaluate the Results
resamples_fitted %>%
    plot_modeltime_resamples(
      .point_size  = 3, 
      .point_alpha = 0.8,
      .interactive = T
    )
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
resamples_fitted %>%
    modeltime_resample_accuracy(summary_fns = mean) %>%
    table_modeltime_accuracy(.interactive = T)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.

ENSEMBLE

Ensamble Media y Meta-Learner

ensemble_fit_mean <- submodels_tbl %>%
  #filter(!.model_id %in% c(1))%>%
  ensemble_average(type="mean")


ensemble_fit_lm <- resamples_fitted %>%
  ensemble_model_spec(
    model_spec = linear_reg(
      penalty = tune(),
      mixture = tune()
    ) %>%
      set_engine("glmnet"),
    grid = 2,
    control = control_grid(verbose = TRUE)
  )
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/2
## ✓ Fold1: preprocessor 1/1, model 1/2
## i Fold1: preprocessor 1/1, model 1/2 (predictions)
## i Fold1: preprocessor 1/1, model 2/2
## ✓ Fold1: preprocessor 1/1, model 2/2
## i Fold1: preprocessor 1/1, model 2/2 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/2
## ✓ Fold2: preprocessor 1/1, model 1/2
## i Fold2: preprocessor 1/1, model 1/2 (predictions)
## i Fold2: preprocessor 1/1, model 2/2
## ✓ Fold2: preprocessor 1/1, model 2/2
## i Fold2: preprocessor 1/1, model 2/2 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/2
## ✓ Fold3: preprocessor 1/1, model 1/2
## i Fold3: preprocessor 1/1, model 1/2 (predictions)
## i Fold3: preprocessor 1/1, model 2/2
## ✓ Fold3: preprocessor 1/1, model 2/2
## i Fold3: preprocessor 1/1, model 2/2 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/2
## ✓ Fold4: preprocessor 1/1, model 1/2
## i Fold4: preprocessor 1/1, model 1/2 (predictions)
## i Fold4: preprocessor 1/1, model 2/2
## ✓ Fold4: preprocessor 1/1, model 2/2
## i Fold4: preprocessor 1/1, model 2/2 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/2
## ✓ Fold5: preprocessor 1/1, model 1/2
## i Fold5: preprocessor 1/1, model 1/2 (predictions)
## i Fold5: preprocessor 1/1, model 2/2
## ✓ Fold5: preprocessor 1/1, model 2/2
## i Fold5: preprocessor 1/1, model 2/2 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 8
##   penalty mixture .metric .estimator    mean     n std_err .config             
##     <dbl>   <dbl> <chr>   <chr>        <dbl> <int>   <dbl> <chr>               
## 1 0.00181   0.361 rmse    standard   107140.     5  11450. Preprocessor1_Model1
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
##   .model_id    rmse .model_desc                              
##   <chr>       <dbl> <chr>                                    
## 1 1         128924. ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2 2         116572. PROPHET W/ XGBOOST ERRORS                
## 3 3         121334. XGBOOST                                  
## 4 4         124179. RANDOMFOREST                             
## 5 ensemble   86134. ENSEMBLE (MODEL SPEC)                    
## 
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## 
## Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~0.360970945999725) 
## 
##    Df  %Dev Lambda
## 1   0  0.00 281000
## 2   3  5.04 256100
## 3   3 11.23 233300
## 4   3 16.77 212600
## 5   3 21.72 193700
## 6   3 26.11 176500
## 7   4 30.03 160800
## 8   4 33.46 146500
## 9   4 36.45 133500
## 10  4 39.05 121700
## 11  4 41.30 110900
## 12  4 43.25 101000
## 13  4 44.95  92030
## 14  4 46.42  83860
## 15  4 47.69  76410
## 16  4 48.80  69620
## 17  3 49.78  63430
## 18  3 50.59  57800
## 19  3 51.29  52660
## 20  3 51.89  47990
## 21  3 52.41  43720
## 22  3 52.85  39840
## 23  3 53.24  36300
## 24  3 53.58  33070
## 25  3 53.87  30140
## 26  3 54.13  27460
## 27  3 54.36  25020
## 28  3 54.56  22800
## 29  3 54.74  20770
## 30  3 54.91  18930
## 31  3 55.05  17240
## 32  3 55.19  15710
## 33  3 55.31  14320
## 34  3 55.43  13050
## 35  3 55.54  11890
## 36  3 55.64  10830
## 37  2 55.71   9868
## 38  2 55.76   8992
## 39  2 55.80   8193
## 40  2 55.83   7465
## 41  2 55.86   6802
## 42  3 56.07   6198
## 43  3 56.40   5647
## 44  3 56.71   5145
## 45  3 57.00   4688
## 46  3 57.27   4272
## 
## ...
## and 46 more lines.
## 
## 5.951 sec elapsed
ensemble_fit_xg<- resamples_fitted %>%
  ensemble_model_spec(
    model_spec = boost_tree(
      mtry=tune(),
      trees=tune(),
      learn_rate = tune()
    ) %>% set_engine("xgboost"),
    control = control_grid(verbose = TRUE)
  )
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Creating pre-processing data to finalize unknown parameter: mtry
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/6
## ✓ Fold1: preprocessor 1/1, model 1/6
## i Fold1: preprocessor 1/1, model 1/6 (predictions)
## i Fold1: preprocessor 1/1, model 2/6
## ✓ Fold1: preprocessor 1/1, model 2/6
## i Fold1: preprocessor 1/1, model 2/6 (predictions)
## i Fold1: preprocessor 1/1, model 3/6
## ✓ Fold1: preprocessor 1/1, model 3/6
## i Fold1: preprocessor 1/1, model 3/6 (predictions)
## i Fold1: preprocessor 1/1, model 4/6
## ✓ Fold1: preprocessor 1/1, model 4/6
## i Fold1: preprocessor 1/1, model 4/6 (predictions)
## i Fold1: preprocessor 1/1, model 5/6
## ✓ Fold1: preprocessor 1/1, model 5/6
## i Fold1: preprocessor 1/1, model 5/6 (predictions)
## i Fold1: preprocessor 1/1, model 6/6
## ✓ Fold1: preprocessor 1/1, model 6/6
## i Fold1: preprocessor 1/1, model 6/6 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/6
## ✓ Fold2: preprocessor 1/1, model 1/6
## i Fold2: preprocessor 1/1, model 1/6 (predictions)
## i Fold2: preprocessor 1/1, model 2/6
## ✓ Fold2: preprocessor 1/1, model 2/6
## i Fold2: preprocessor 1/1, model 2/6 (predictions)
## i Fold2: preprocessor 1/1, model 3/6
## ✓ Fold2: preprocessor 1/1, model 3/6
## i Fold2: preprocessor 1/1, model 3/6 (predictions)
## i Fold2: preprocessor 1/1, model 4/6
## ✓ Fold2: preprocessor 1/1, model 4/6
## i Fold2: preprocessor 1/1, model 4/6 (predictions)
## i Fold2: preprocessor 1/1, model 5/6
## ✓ Fold2: preprocessor 1/1, model 5/6
## i Fold2: preprocessor 1/1, model 5/6 (predictions)
## i Fold2: preprocessor 1/1, model 6/6
## ✓ Fold2: preprocessor 1/1, model 6/6
## i Fold2: preprocessor 1/1, model 6/6 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/6
## ✓ Fold3: preprocessor 1/1, model 1/6
## i Fold3: preprocessor 1/1, model 1/6 (predictions)
## i Fold3: preprocessor 1/1, model 2/6
## ✓ Fold3: preprocessor 1/1, model 2/6
## i Fold3: preprocessor 1/1, model 2/6 (predictions)
## i Fold3: preprocessor 1/1, model 3/6
## ✓ Fold3: preprocessor 1/1, model 3/6
## i Fold3: preprocessor 1/1, model 3/6 (predictions)
## i Fold3: preprocessor 1/1, model 4/6
## ✓ Fold3: preprocessor 1/1, model 4/6
## i Fold3: preprocessor 1/1, model 4/6 (predictions)
## i Fold3: preprocessor 1/1, model 5/6
## ✓ Fold3: preprocessor 1/1, model 5/6
## i Fold3: preprocessor 1/1, model 5/6 (predictions)
## i Fold3: preprocessor 1/1, model 6/6
## ✓ Fold3: preprocessor 1/1, model 6/6
## i Fold3: preprocessor 1/1, model 6/6 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/6
## ✓ Fold4: preprocessor 1/1, model 1/6
## i Fold4: preprocessor 1/1, model 1/6 (predictions)
## i Fold4: preprocessor 1/1, model 2/6
## ✓ Fold4: preprocessor 1/1, model 2/6
## i Fold4: preprocessor 1/1, model 2/6 (predictions)
## i Fold4: preprocessor 1/1, model 3/6
## ✓ Fold4: preprocessor 1/1, model 3/6
## i Fold4: preprocessor 1/1, model 3/6 (predictions)
## i Fold4: preprocessor 1/1, model 4/6
## ✓ Fold4: preprocessor 1/1, model 4/6
## i Fold4: preprocessor 1/1, model 4/6 (predictions)
## i Fold4: preprocessor 1/1, model 5/6
## ✓ Fold4: preprocessor 1/1, model 5/6
## i Fold4: preprocessor 1/1, model 5/6 (predictions)
## i Fold4: preprocessor 1/1, model 6/6
## ✓ Fold4: preprocessor 1/1, model 6/6
## i Fold4: preprocessor 1/1, model 6/6 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/6
## ✓ Fold5: preprocessor 1/1, model 1/6
## i Fold5: preprocessor 1/1, model 1/6 (predictions)
## i Fold5: preprocessor 1/1, model 2/6
## ✓ Fold5: preprocessor 1/1, model 2/6
## i Fold5: preprocessor 1/1, model 2/6 (predictions)
## i Fold5: preprocessor 1/1, model 3/6
## ✓ Fold5: preprocessor 1/1, model 3/6
## i Fold5: preprocessor 1/1, model 3/6 (predictions)
## i Fold5: preprocessor 1/1, model 4/6
## ✓ Fold5: preprocessor 1/1, model 4/6
## i Fold5: preprocessor 1/1, model 4/6 (predictions)
## i Fold5: preprocessor 1/1, model 5/6
## ✓ Fold5: preprocessor 1/1, model 5/6
## i Fold5: preprocessor 1/1, model 5/6 (predictions)
## i Fold5: preprocessor 1/1, model 6/6
## ✓ Fold5: preprocessor 1/1, model 6/6
## i Fold5: preprocessor 1/1, model 6/6 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 9
##    mtry trees learn_rate .metric .estimator   mean     n std_err .config        
##   <int> <int>      <dbl> <chr>   <chr>       <dbl> <int>   <dbl> <chr>          
## 1     1  1762    0.00209 rmse    standard   83060.     5  13115. Preprocessor1_…
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
##   .model_id    rmse .model_desc                              
##   <chr>       <dbl> <chr>                                    
## 1 1         128924. ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS
## 2 2         116572. PROPHET W/ XGBOOST ERRORS                
## 3 3         121334. XGBOOST                                  
## 4 4         124179. RANDOMFOREST                             
## 5 ensemble   52578. ENSEMBLE (MODEL SPEC)                    
## 
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: boost_tree()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## ##### xgb.Booster
## raw: 1.3 Mb 
## call:
##   xgboost::xgb.train(params = list(eta = 0.00209271548061252, max_depth = 6, 
##     gamma = 0, colsample_bytree = 1, colsample_bynode = 0.25, 
##     min_child_weight = 1, subsample = 1, objective = "reg:squarederror"), 
##     data = x$data, nrounds = 1762L, watchlist = x$watchlist, 
##     verbose = 0, nthread = 1)
## params (as set within xgb.train):
##   eta = "0.00209271548061252", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "0.25", min_child_weight = "1", subsample = "1", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
## xgb.attributes:
##   niter
## callbacks:
##   cb.evaluation.log()
## # of features: 4 
## niter: 1762
## nfeatures : 4 
## evaluation_log:
##     iter training_rmse
##        1     932120.44
##        2     930284.50
## ---                   
##     1761      52642.61
##     1762      52578.50
## 
## 22.054 sec elapsed
ensemble_tbl<- modeltime_table(
  ensemble_fit_mean,
  ensemble_fit_lm,
  ensemble_fit_xg
)

Ensemble test Accuracy

ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)%>%
  modeltime_accuracy(testing(splits))%>%
  arrange(rmse,mae,mape,mase)%>%
  knitr::kable()
.model_id .model_desc .type mae mape mase smape rmse rsq
4 ARIMA(1,1,2)(2,0,0)[12] W/ XGBOOST ERRORS Test 32498.55 3.200357 0.5677113 3.206721 37479.12 0.8764449
1 ENSEMBLE (MEAN): 4 MODELS Test 31470.17 2.950817 0.5497468 3.046988 50382.44 0.8443752
2 ENSEMBLE (GLMNET STACK): 4 MODELS Test 51249.20 5.022606 0.8952631 5.058998 61370.95 0.3851515
3 ENSEMBLE (XGBOOST STACK): 4 MODELS Test 53505.92 5.228635 0.9346854 5.347639 63400.46 0.7831948
5 PROPHET W/ XGBOOST ERRORS Test 52131.10 5.268918 0.9106689 5.210271 64435.97 0.9630310
6 XGBOOST Test 46463.83 4.473893 0.8116684 4.667059 66341.33 0.5212406
7 RANDOMFOREST Test 55565.22 5.296326 0.9706589 5.524929 71790.80 0.9378041

Ensemble Test Forecast

ensemble_tbl%>%
  modeltime_calibrate(testing(splits))%>%
  modeltime_forecast(
    new_data =  testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T,
    conf_by_id = T,
    conf_interval = 0.95
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
## Warning: The 'id' column in calibration data was not detected. Global Confidence
## Interval is being returned.
ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)%>%
  modeltime_calibrate(testing(splits))%>%
  modeltime_forecast(
    new_data =  testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
ensemble_tbl_all_model<-ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)

Refit Ensemble

ensemble_refit_tbl <- ensemble_tbl%>%
  modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year

Visualize Ensemble Forecast

ensemble_refit_tbl%>%
  modeltime_forecast(
    new_data =  future_tbl,
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo. 
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.

DOLARES

DATA

dolares%>%
  plot_time_series(Date,value,.facet_ncol = 3, .interactive = F)

DATA PREPARATION

FORECAST_HORIZON <- 5

Full = Training + Forecast Dataset

full_data_tbl <- dolares%>%
  select(Date,value)%>%
  future_frame(
    .date_var = Date,
    .length_out = FORECAST_HORIZON,
    .bind_data = T
  )

Training Data

data_prepared_tbl <- full_data_tbl%>%
  filter(!is.na(value))
  
data_prepared_tbl%>%
  tk_summary_diagnostics()
## tk_augment_timeseries_signature(): Using the following .date_var variable: Date
## # A tibble: 1 × 12
##   n.obs start      end        units scale tzone diff.minimum diff.q1 diff.median
##   <int> <date>     <date>     <chr> <chr> <chr>        <dbl>   <dbl>       <dbl>
## 1   246 2001-02-01 2021-07-01 days  month UTC        2419200 2592000     2678400
## # … with 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>

Future Data Forecast

future_tbl <- full_data_tbl%>%
  filter(is.na(value))

SPLITTING

splits <- data_prepared_tbl%>%
  arrange(Date)%>%
  time_series_split(
    data_var=Date,
    assess = FORECAST_HORIZON,
    cumulative = T
  )
## Using date_var: Date
splits
## <Analysis/Assess/Total>
## <241/5/246>

PREPROCESOR

recipe_spec_1 <- recipe(value~., training(splits))%>%
  step_timeseries_signature(Date)%>%
  ## Elimina las columnas o atributos que no aportan
  step_rm(matches("(.iso$)|(.xts)|(day)|(hour)|(minute)|(second)|(am.pm)|(week)")) %>%
  step_normalize(Date_index.num,Date_year)%>%
  step_mutate(Date_month = factor(Date_month,ordered = T))%>%
  step_dummy(all_nominal(),one_hot = T)

recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date              <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value             <dbl> 21.8817, 24.1889, 24.6323, 30.7223, 30.6749, 31.9302…
## $ Date_index.num    <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year         <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half         <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter      <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02     <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05     <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06     <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07     <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08     <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09     <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_2 <- recipe_spec_1%>%
  update_role(Date,new_role = "ID")

recipe_spec_2 %>% prep() %>% juice() %>% glimpse()
## Rows: 241
## Columns: 30
## $ Date              <date> 2001-02-01, 2001-03-01, 2001-04-01, 2001-05-01, 200…
## $ value             <dbl> 21.8817, 24.1889, 24.6323, 30.7223, 30.6749, 31.9302…
## $ Date_index.num    <dbl> -1.720507, -1.707312, -1.692703, -1.678565, -1.66395…
## $ Date_year         <dbl> -1.656391, -1.656391, -1.656391, -1.656391, -1.65639…
## $ Date_half         <int> 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2…
## $ Date_quarter      <int> 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3…
## $ Date_month_01     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month_02     <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month_03     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month_04     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month_05     <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month_06     <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month_07     <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month_08     <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_09     <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_10     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_11     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month_12     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_01 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_02 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ Date_month.lbl_03 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Date_month.lbl_04 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ Date_month.lbl_05 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ Date_month.lbl_06 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Date_month.lbl_07 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Date_month.lbl_08 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_09 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Date_month.lbl_12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
recipe_spec_1 %>% prep() %>% summary()
## # A tibble: 30 × 4
##    variable       type    role      source  
##    <chr>          <chr>   <chr>     <chr>   
##  1 Date           date    predictor original
##  2 value          numeric outcome   original
##  3 Date_index.num numeric predictor derived 
##  4 Date_year      numeric predictor derived 
##  5 Date_half      numeric predictor derived 
##  6 Date_quarter   numeric predictor derived 
##  7 Date_month_01  numeric predictor derived 
##  8 Date_month_02  numeric predictor derived 
##  9 Date_month_03  numeric predictor derived 
## 10 Date_month_04  numeric predictor derived 
## # … with 20 more rows
recipe_spec_2 %>% prep() %>% summary()
## # A tibble: 30 × 4
##    variable       type    role      source  
##    <chr>          <chr>   <chr>     <chr>   
##  1 Date           date    ID        original
##  2 value          numeric outcome   original
##  3 Date_index.num numeric predictor derived 
##  4 Date_year      numeric predictor derived 
##  5 Date_half      numeric predictor derived 
##  6 Date_quarter   numeric predictor derived 
##  7 Date_month_01  numeric predictor derived 
##  8 Date_month_02  numeric predictor derived 
##  9 Date_month_03  numeric predictor derived 
## 10 Date_month_04  numeric predictor derived 
## # … with 20 more rows

MODELS

autoarima xgboost

wflw_fit_autoarima_boost <- workflow()%>%
  add_model(
    arima_boost(
    min_n = 2,
    learn_rate = 0.015
) %>%
    set_engine(engine = "auto_arima_xgboost")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))
## frequency = 12 observations per 1 year

prophet

wflw_fit_prophet <- workflow()%>%
  add_model(
    prophet_reg() %>% set_engine("prophet")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

XGBOOST

wflw_fit_xgboost_0_015 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.15) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_xgboost_0_1 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.1) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_xgboost_0_3 <- workflow()%>%
  add_model(
    boost_tree(learn_rate=0.3) %>% set_engine("xgboost")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

Random Forest

wflw_fit_rf_1000 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 1000
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_rf_500 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 500
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

wflw_fit_rf_200 <- workflow()%>%
  add_model(
    rand_forest(
                trees = 200
                ) %>% 
      set_engine("randomForest")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

SVM

wflw_fit_svm <- workflow()%>%
  add_model(
    svm_rbf() %>% set_engine("kernlab")
  )%>%
  add_recipe(recipe_spec_2)%>%
  fit(training(splits))

prophet_boost

wflw_fit_prophet_boost <- workflow()%>%
  add_model(
    prophet_boost(
      seasonality_yearly = F,
      seasonality_weekly = F,
      seasonality_daily =  F,
    ) %>% 
      set_engine("prophet_xgboost")
  )%>%
  add_recipe(recipe_spec_1)%>%
  fit(training(splits))

MODELTIME WORKFLOW

modeltime table

submodels_tbl <- modeltime_table(
  wflw_fit_autoarima_boost,
  #wflw_fit_prophet, #1
  wflw_fit_prophet_boost, #2
  #wflw_fit_xgboost_0_015, #3
  #wflw_fit_xgboost_0_1, #4
  wflw_fit_xgboost_0_3, #5
  #wflw_fit_rf_1000, #6
  wflw_fit_rf_500 #, #7
  #wflw_fit_rf_200, #8
  #wflw_fit_svm #9
)

submodels_tbl
## # Modeltime Table
## # A tibble: 4 × 3
##   .model_id .model     .model_desc                                         
##       <int> <list>     <chr>                                               
## 1         1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS                           
## 3         3 <workflow> XGBOOST                                             
## 4         4 <workflow> RANDOMFOREST

calibrate Testing Data

submodels_calibrated_tbl <- submodels_tbl %>%
  modeltime_calibrate(testing(splits))

submodels_calibrated_tbl
## # Modeltime Table
## # A tibble: 4 × 5
##   .model_id .model     .model_desc                        .type .calibration_da…
##       <int> <list>     <chr>                              <chr> <list>          
## 1         1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIF… Test  <tibble [5 × 4]>
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS          Test  <tibble [5 × 4]>
## 3         3 <workflow> XGBOOST                            Test  <tibble [5 × 4]>
## 4         4 <workflow> RANDOMFOREST                       Test  <tibble [5 × 4]>

Measure Test Accuracy

submodels_calibrated_tbl%>% 
  modeltime_accuracy()%>%
  arrange(rmse)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## # A tibble: 4 × 9
##   .model_id .model_desc              .type   mae  mape  mase smape  rmse     rsq
##       <int> <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1         1 ARIMA(0,1,0)(1,0,0)[12]… Test   66.6  3.76 0.774  3.81  75.8  0.792 
## 2         2 PROPHET W/ XGBOOST ERRO… Test  118.   6.53 1.37   6.83 141.   0.0919
## 3         3 XGBOOST                  Test  168.   9.34 1.95   9.93 192.  NA     
## 4         4 RANDOMFOREST             Test  311.  17.6  3.62  19.4  322.   0.463

Visualize test forecast

submodels_calibrated_tbl %>%
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )

Refit on full training dataset

submodels_refit_tbl <- submodels_calibrated_tbl %>%
  modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year

Visualize Submodel Forecast

submodels_refit_tbl%>%
  modeltime_forecast(
    new_data =  future_tbl,
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )

Validación cruzada

https://cran.r-project.org/web/packages/modeltime.resample/vignettes/getting-started.html

resamples_tscv <- time_series_cv(
    data        = data_prepared_tbl,
    date_var    = Date,
    assess      = FORECAST_HORIZON,
    initial     = "36 month",
    skip        = FORECAST_HORIZON,
    slice_limit = 5
)

resamples_tscv
## # Time Series Cross Validation Plan 
## # A tibble: 5 × 2
##   splits         id    
##   <list>         <chr> 
## 1 <split [36/5]> Slice1
## 2 <split [36/5]> Slice2
## 3 <split [36/5]> Slice3
## 4 <split [36/5]> Slice4
## 5 <split [36/5]> Slice5
resamples_tscv %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(Date, 
                             value, 
                             .facet_ncol = 2,
                             .interactive = T)
Generate Resample Predictions
resamples_fitted <- submodels_tbl %>%
    modeltime_fit_resamples(
        resamples = resamples_tscv,
        control   = control_resamples(verbose = FALSE)
    )

resamples_fitted
## # Modeltime Table
## # A tibble: 4 × 4
##   .model_id .model     .model_desc                              .resample_resul…
##       <int> <list>     <chr>                                    <list>          
## 1         1 <workflow> ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ X… <rsmp[+]>       
## 2         2 <workflow> PROPHET W/ XGBOOST ERRORS                <rsmp[+]>       
## 3         3 <workflow> XGBOOST                                  <rsmp[+]>       
## 4         4 <workflow> RANDOMFOREST                             <rsmp[+]>
Evaluate the Results
resamples_fitted %>%
    plot_modeltime_resamples(
      .point_size  = 3, 
      .point_alpha = 0.8,
      .interactive = T
    )
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
resamples_fitted %>%
    modeltime_resample_accuracy(summary_fns = mean) %>%
    table_modeltime_accuracy(.interactive = T)
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.

ENSEMBLE

Ensamble Media y Meta-Learner

ensemble_fit_mean <- submodels_tbl %>%
  #filter(!.model_id %in% c(1))%>%
  ensemble_average(type="mean")


ensemble_fit_lm <- resamples_fitted %>%
  ensemble_model_spec(
    model_spec = linear_reg(
      penalty = tune(),
      mixture = tune()
    ) %>%
      set_engine("glmnet"),
    grid = 2,
    control = control_grid(verbose = TRUE)
  )
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/2
## ✓ Fold1: preprocessor 1/1, model 1/2
## i Fold1: preprocessor 1/1, model 1/2 (predictions)
## i Fold1: preprocessor 1/1, model 2/2
## ✓ Fold1: preprocessor 1/1, model 2/2
## i Fold1: preprocessor 1/1, model 2/2 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/2
## ✓ Fold2: preprocessor 1/1, model 1/2
## i Fold2: preprocessor 1/1, model 1/2 (predictions)
## i Fold2: preprocessor 1/1, model 2/2
## ✓ Fold2: preprocessor 1/1, model 2/2
## i Fold2: preprocessor 1/1, model 2/2 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/2
## ✓ Fold3: preprocessor 1/1, model 1/2
## i Fold3: preprocessor 1/1, model 1/2 (predictions)
## i Fold3: preprocessor 1/1, model 2/2
## ✓ Fold3: preprocessor 1/1, model 2/2
## i Fold3: preprocessor 1/1, model 2/2 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/2
## ✓ Fold4: preprocessor 1/1, model 1/2
## i Fold4: preprocessor 1/1, model 1/2 (predictions)
## i Fold4: preprocessor 1/1, model 2/2
## ✓ Fold4: preprocessor 1/1, model 2/2
## i Fold4: preprocessor 1/1, model 2/2 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/2
## ✓ Fold5: preprocessor 1/1, model 1/2
## i Fold5: preprocessor 1/1, model 1/2 (predictions)
## i Fold5: preprocessor 1/1, model 2/2
## ✓ Fold5: preprocessor 1/1, model 2/2
## i Fold5: preprocessor 1/1, model 2/2 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 8
##    penalty mixture .metric .estimator  mean     n std_err .config             
##      <dbl>   <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
## 1 0.000174   0.416 rmse    standard    104.     5    14.0 Preprocessor1_Model1
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
##   .model_id  rmse .model_desc                                         
##   <chr>     <dbl> <chr>                                               
## 1 1         178.  ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2 2         159.  PROPHET W/ XGBOOST ERRORS                           
## 3 3         189.  XGBOOST                                             
## 4 4         206.  RANDOMFOREST                                        
## 5 ensemble   73.6 ENSEMBLE (MODEL SPEC)                               
## 
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: linear_reg()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## 
## Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~0.415619745833101) 
## 
##    Df  %Dev Lambda
## 1   0  0.00 515.90
## 2   3  9.36 470.10
## 3   3 18.66 428.30
## 4   4 27.19 390.30
## 5   4 34.81 355.60
## 6   4 41.47 324.00
## 7   4 47.27 295.20
## 8   4 52.32 269.00
## 9   4 56.69 245.10
## 10  4 60.47 223.30
## 11  4 63.72 203.50
## 12  4 66.52 185.40
## 13  4 68.93 168.90
## 14  4 70.99 153.90
## 15  4 72.77 140.30
## 16  4 74.29 127.80
## 17  4 75.59 116.40
## 18  4 76.71 106.10
## 19  4 77.68  96.68
## 20  4 78.52  88.09
## 21  4 79.24  80.26
## 22  4 79.88  73.13
## 23  4 80.43  66.64
## 24  4 80.93  60.72
## 25  3 81.33  55.32
## 26  3 81.64  50.41
## 27  3 81.90  45.93
## 28  3 82.13  41.85
## 29  3 82.32  38.13
## 30  3 82.49  34.74
## 31  3 82.63  31.66
## 32  3 82.75  28.85
## 33  3 82.86  26.28
## 34  3 82.95  23.95
## 35  3 83.03  21.82
## 36  3 83.10  19.88
## 37  3 83.16  18.12
## 38  3 83.22  16.51
## 39  3 83.27  15.04
## 40  3 83.32  13.70
## 41  3 83.36  12.49
## 42  3 83.39  11.38
## 43  3 83.43  10.37
## 44  3 83.46   9.45
## 45  4 83.68   8.61
## 46  4 84.20   7.84
## 
## ...
## and 52 more lines.
## 
## 5.595 sec elapsed
ensemble_fit_xg<- resamples_fitted %>%
  ensemble_model_spec(
    model_spec = boost_tree(
      mtry=tune(),
      trees=tune(),
      learn_rate = tune()
    ) %>% set_engine("xgboost"),
    control = control_grid(verbose = TRUE)
  )
## ── Tuning Model Specification ───────────────────────────────────
## ℹ Performing 5-Fold Cross Validation.
## i Creating pre-processing data to finalize unknown parameter: mtry
## i Fold1: preprocessor 1/1
## ✓ Fold1: preprocessor 1/1
## i Fold1: preprocessor 1/1, model 1/6
## ✓ Fold1: preprocessor 1/1, model 1/6
## i Fold1: preprocessor 1/1, model 1/6 (predictions)
## i Fold1: preprocessor 1/1, model 2/6
## ✓ Fold1: preprocessor 1/1, model 2/6
## i Fold1: preprocessor 1/1, model 2/6 (predictions)
## i Fold1: preprocessor 1/1, model 3/6
## ✓ Fold1: preprocessor 1/1, model 3/6
## i Fold1: preprocessor 1/1, model 3/6 (predictions)
## i Fold1: preprocessor 1/1, model 4/6
## ✓ Fold1: preprocessor 1/1, model 4/6
## i Fold1: preprocessor 1/1, model 4/6 (predictions)
## i Fold1: preprocessor 1/1, model 5/6
## ✓ Fold1: preprocessor 1/1, model 5/6
## i Fold1: preprocessor 1/1, model 5/6 (predictions)
## i Fold1: preprocessor 1/1, model 6/6
## ✓ Fold1: preprocessor 1/1, model 6/6
## i Fold1: preprocessor 1/1, model 6/6 (predictions)
## i Fold2: preprocessor 1/1
## ✓ Fold2: preprocessor 1/1
## i Fold2: preprocessor 1/1, model 1/6
## ✓ Fold2: preprocessor 1/1, model 1/6
## i Fold2: preprocessor 1/1, model 1/6 (predictions)
## i Fold2: preprocessor 1/1, model 2/6
## ✓ Fold2: preprocessor 1/1, model 2/6
## i Fold2: preprocessor 1/1, model 2/6 (predictions)
## i Fold2: preprocessor 1/1, model 3/6
## ✓ Fold2: preprocessor 1/1, model 3/6
## i Fold2: preprocessor 1/1, model 3/6 (predictions)
## i Fold2: preprocessor 1/1, model 4/6
## ✓ Fold2: preprocessor 1/1, model 4/6
## i Fold2: preprocessor 1/1, model 4/6 (predictions)
## i Fold2: preprocessor 1/1, model 5/6
## ✓ Fold2: preprocessor 1/1, model 5/6
## i Fold2: preprocessor 1/1, model 5/6 (predictions)
## i Fold2: preprocessor 1/1, model 6/6
## ✓ Fold2: preprocessor 1/1, model 6/6
## i Fold2: preprocessor 1/1, model 6/6 (predictions)
## i Fold3: preprocessor 1/1
## ✓ Fold3: preprocessor 1/1
## i Fold3: preprocessor 1/1, model 1/6
## ✓ Fold3: preprocessor 1/1, model 1/6
## i Fold3: preprocessor 1/1, model 1/6 (predictions)
## i Fold3: preprocessor 1/1, model 2/6
## ✓ Fold3: preprocessor 1/1, model 2/6
## i Fold3: preprocessor 1/1, model 2/6 (predictions)
## i Fold3: preprocessor 1/1, model 3/6
## ✓ Fold3: preprocessor 1/1, model 3/6
## i Fold3: preprocessor 1/1, model 3/6 (predictions)
## i Fold3: preprocessor 1/1, model 4/6
## ✓ Fold3: preprocessor 1/1, model 4/6
## i Fold3: preprocessor 1/1, model 4/6 (predictions)
## i Fold3: preprocessor 1/1, model 5/6
## ✓ Fold3: preprocessor 1/1, model 5/6
## i Fold3: preprocessor 1/1, model 5/6 (predictions)
## i Fold3: preprocessor 1/1, model 6/6
## ✓ Fold3: preprocessor 1/1, model 6/6
## i Fold3: preprocessor 1/1, model 6/6 (predictions)
## i Fold4: preprocessor 1/1
## ✓ Fold4: preprocessor 1/1
## i Fold4: preprocessor 1/1, model 1/6
## ✓ Fold4: preprocessor 1/1, model 1/6
## i Fold4: preprocessor 1/1, model 1/6 (predictions)
## i Fold4: preprocessor 1/1, model 2/6
## ✓ Fold4: preprocessor 1/1, model 2/6
## i Fold4: preprocessor 1/1, model 2/6 (predictions)
## i Fold4: preprocessor 1/1, model 3/6
## ✓ Fold4: preprocessor 1/1, model 3/6
## i Fold4: preprocessor 1/1, model 3/6 (predictions)
## i Fold4: preprocessor 1/1, model 4/6
## ✓ Fold4: preprocessor 1/1, model 4/6
## i Fold4: preprocessor 1/1, model 4/6 (predictions)
## i Fold4: preprocessor 1/1, model 5/6
## ✓ Fold4: preprocessor 1/1, model 5/6
## i Fold4: preprocessor 1/1, model 5/6 (predictions)
## i Fold4: preprocessor 1/1, model 6/6
## ✓ Fold4: preprocessor 1/1, model 6/6
## i Fold4: preprocessor 1/1, model 6/6 (predictions)
## i Fold5: preprocessor 1/1
## ✓ Fold5: preprocessor 1/1
## i Fold5: preprocessor 1/1, model 1/6
## ✓ Fold5: preprocessor 1/1, model 1/6
## i Fold5: preprocessor 1/1, model 1/6 (predictions)
## i Fold5: preprocessor 1/1, model 2/6
## ✓ Fold5: preprocessor 1/1, model 2/6
## i Fold5: preprocessor 1/1, model 2/6 (predictions)
## i Fold5: preprocessor 1/1, model 3/6
## ✓ Fold5: preprocessor 1/1, model 3/6
## i Fold5: preprocessor 1/1, model 3/6 (predictions)
## i Fold5: preprocessor 1/1, model 4/6
## ✓ Fold5: preprocessor 1/1, model 4/6
## i Fold5: preprocessor 1/1, model 4/6 (predictions)
## i Fold5: preprocessor 1/1, model 5/6
## ✓ Fold5: preprocessor 1/1, model 5/6
## i Fold5: preprocessor 1/1, model 5/6 (predictions)
## i Fold5: preprocessor 1/1, model 6/6
## ✓ Fold5: preprocessor 1/1, model 6/6
## i Fold5: preprocessor 1/1, model 6/6 (predictions)
## ✓ Finished tuning Model Specification.
## ℹ Model Parameters:
## # A tibble: 1 × 9
##    mtry trees learn_rate .metric .estimator  mean     n std_err .config         
##   <int> <int>      <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>           
## 1     2  1026     0.0661 rmse    standard    85.2     5    10.1 Preprocessor1_M…
## ℹ Prediction Error Comparison:
## # A tibble: 5 × 3
##   .model_id      rmse .model_desc                                         
##   <chr>         <dbl> <chr>                                               
## 1 1         178.      ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS
## 2 2         159.      PROPHET W/ XGBOOST ERRORS                           
## 3 3         189.      XGBOOST                                             
## 4 4         206.      RANDOMFOREST                                        
## 5 ensemble    0.00117 ENSEMBLE (MODEL SPEC)                               
## 
## ── Final Model ──────────────────────────────────────────────────
## ℹ Model Workflow:
## ══ Workflow [trained] ══════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: boost_tree()
## 
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 0 Recipe Steps
## 
## ── Model ───────────────────────────────────────────────────────────────────────
## ##### xgb.Booster
## raw: 1.1 Mb 
## call:
##   xgboost::xgb.train(params = list(eta = 0.0661406338030049, max_depth = 6, 
##     gamma = 0, colsample_bytree = 1, colsample_bynode = 0.5, 
##     min_child_weight = 1, subsample = 1, objective = "reg:squarederror"), 
##     data = x$data, nrounds = 1026L, watchlist = x$watchlist, 
##     verbose = 0, nthread = 1)
## params (as set within xgb.train):
##   eta = "0.0661406338030049", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "0.5", min_child_weight = "1", subsample = "1", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
## xgb.attributes:
##   niter
## callbacks:
##   cb.evaluation.log()
## # of features: 4 
## niter: 1026
## nfeatures : 4 
## evaluation_log:
##     iter training_rmse
##        1   1345.790771
##        2   1262.893677
## ---                   
##     1025      0.001168
##     1026      0.001168
## 
## 20.662 sec elapsed
ensemble_tbl<- modeltime_table(
  ensemble_fit_mean,
  ensemble_fit_lm,
  ensemble_fit_xg
)

Ensemble test Accuracy

ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)%>%
  modeltime_accuracy(testing(splits))%>%
  arrange(rmse,mae,mape,mase)%>%
  knitr::kable()
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
.model_id .model_desc .type mae mape mase smape rmse rsq
4 ARIMA(0,1,0)(1,0,0)[12] WITH DRIFT W/ XGBOOST ERRORS Test 66.62842 3.755342 0.7740205 3.805410 75.78838 0.7921893
3 ENSEMBLE (XGBOOST STACK): 4 MODELS Test 91.92276 5.117643 1.0678641 5.268277 105.26510 0.4953768
5 PROPHET W/ XGBOOST ERRORS Test 118.03429 6.525348 1.3712011 6.833269 141.17847 0.0918866
2 ENSEMBLE (GLMNET STACK): 4 MODELS Test 150.57993 8.410085 1.7492828 8.794916 168.50830 0.8190488
1 ENSEMBLE (MEAN): 4 MODELS Test 153.85243 8.552809 1.7872993 9.045627 175.44023 0.6300985
6 XGBOOST Test 168.01570 9.335913 1.9518337 9.930132 192.05816 NA
7 RANDOMFOREST Test 311.34186 17.576806 3.6168497 19.370058 322.35502 0.4626032

Ensemble Test Forecast

ensemble_tbl%>%
  modeltime_calibrate(testing(splits))%>%
  modeltime_forecast(
    new_data =  testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T,
    conf_by_id = T,
    conf_interval = 0.95
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
## Warning: The 'id' column in calibration data was not detected. Global Confidence
## Interval is being returned.
ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)%>%
  modeltime_calibrate(testing(splits))%>%
  modeltime_forecast(
    new_data =  testing(splits),
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
ensemble_tbl_all_model<-ensemble_tbl%>%
  combine_modeltime_tables(submodels_tbl)

Refit Ensemble

ensemble_refit_tbl <- ensemble_tbl%>%
  modeltime_refit(data_prepared_tbl)
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year
## Warning in mdl_time_refit.mdl_time_ensemble_model_spec(...): 'resamples' not
## provided during refitting. Submodels will be refit, but the meta-learner will
## *not* be refit. You can provide 'resamples' via `modeltime_refit(object, data,
## resamples, control)`. Proceeding by refitting the submodels only.
## frequency = 12 observations per 1 year

Visualize Ensemble Forecast

ensemble_refit_tbl%>%
  modeltime_forecast(
    new_data =  future_tbl,
    actual_data = data_prepared_tbl,
    keep_data = T
  )%>%
  plot_modeltime_forecast(
    .facet_ncol=2
  )
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo. 
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.